{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# SVM Learning"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from coherence import load_coherence_dataset\n",
    "\n",
    "coherence_ds = load_coherence_dataset()\n",
    "X = coherence_ds.data\n",
    "y = coherence_ds.target"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train-Test Split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Before we construct out model pipeline, we divide the dataset into separate a separate **training** dataset (75% of the data) and a separate **test** dataset (25% of the data):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## K-fold Cross Validation and HyperParameter Tuning"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Tuning hyperparameters via Grid Search"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.grid_search import GridSearchCV\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.pipeline import Pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Setting HyperParameters to tune"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]\n",
    "param_grid = [# Parameters for Linear Kernel \n",
    "              {'clf__C': param_range,\n",
    "               'clf__kernel': ['linear']}, \n",
    "              # Parameter for RBF Kernel\n",
    "              {'clf__C': param_range,\n",
    "               'clf__gamma': param_range,\n",
    "               'clf__kernel': ['rbf']},\n",
    "              # Parameter for Polynomial Kernel\n",
    "              {'clf__C': param_range,\n",
    "               'clf__degree': [2, 3, 5, 8, 16],\n",
    "               'clf__coef0': [0, 1],\n",
    "               'clf__kernel': ['poly']},\n",
    "            ]\n",
    "\n",
    "# So far, the `Pipeline` is not actually needed, but\n",
    "# this code is ready in case multiple (features selection)\n",
    "# steps would be considered for inclusion\n",
    "pipe_svc = Pipeline([('clf', SVC(random_state=1))]) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Show HyperParameters Grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hyperparamters for Kernel:  Poly\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.0001, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.001, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.01, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 0.1, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 10.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 100.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 0, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 16}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 2}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 3}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 5}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 8}\n",
      "\t {'clf__coef0': 1, 'clf__kernel': 'poly', 'clf__C': 1000.0, 'clf__degree': 16}\n",
      "--------------------------------------------------------------------------------\n",
      "Hyperparamters for Kernel:  Rbf\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 0.0001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 0.001, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 0.01, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 0.1, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 100.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.0001, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.001, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.01, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 0.1, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1.0, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 10.0, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 100.0, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "\t {'clf__gamma': 1000.0, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "--------------------------------------------------------------------------------\n",
      "Hyperparamters for Kernel:  Linear\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 0.0001}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 0.001}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 0.01}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 0.1}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 1.0}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 10.0}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 100.0}\n",
      "\t {'clf__kernel': 'linear', 'clf__C': 1000.0}\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "from sklearn.grid_search import ParameterGrid\n",
    "hyperp_grid = list(ParameterGrid(param_grid))\n",
    "\n",
    "# Pretty Printing Grid\n",
    "kernel_combinations = dict()\n",
    "for hp in hyperp_grid:\n",
    "    kernel = hp['clf__kernel']\n",
    "    kernel_combinations.setdefault(kernel, list())\n",
    "    kernel_combinations[kernel].append(hp)\n",
    "    \n",
    "for kernel in kernel_combinations:\n",
    "    print('Hyperparamters for Kernel: ', kernel.title())\n",
    "    for hp in kernel_combinations[kernel]:\n",
    "        print('\\t', hp)\n",
    "    print('-'*80)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Stratified K-Fold Cross Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.cross_validation import cross_val_score\n",
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "def classify_samples(pipe_svc, param_grid, score, scoring='accuracy', **extra_score_params):\n",
    "    \"\"\"\n",
    "    \"\"\"\n",
    "    \n",
    "    print('-'*80)\n",
    "    print('PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC: ', scoring)\n",
    "    \n",
    "    # GridSearch CV\n",
    "    gs = GridSearchCV(estimator=pipe_svc,\n",
    "                      param_grid=param_grid,\n",
    "                      scoring=scoring,\n",
    "                      cv=10, n_jobs=-1)\n",
    "    \n",
    "    gs = gs.fit(X_train, y_train)  # Fit the GridSearchCV Estimator \n",
    "\n",
    "    # Printing training results \n",
    "    print('Best Score: ', gs.best_score_)\n",
    "    print('Best Parameters: ', gs.best_params_)\n",
    "    #print('Grid Scores: ', gs.grid_scores_)\n",
    "    \n",
    "    # Predict with Best Estimator\n",
    "    clf = gs.best_estimator_\n",
    "    clf.fit(X_train, y_train)\n",
    "    y_pred = clf.predict(X_test)\n",
    "\n",
    "    print('Mean Accuracy on Test set: %.3f' %  (clf.score(X_test, y_test)))\n",
    "    \n",
    "    print('Calculating Score on Prediction: ', score(y_test, y_pred, **extra_score_params))\n",
    "    \n",
    "    #Confusion Matrix\n",
    "    cm = confusion_matrix(y_test, y_pred)\n",
    "    print(cm)\n",
    "    \n",
    "    scores = cross_val_score(gs, X, y, scoring=scoring, cv=10, n_jobs=-1)\n",
    "    print('All CV Scores')\n",
    "    for i, score in enumerate(scores):\n",
    "        print(i, '): ', score)\n",
    "\n",
    "    print('CV %s : %.3f +/- %.3f' % (scoring, np.mean(scores), np.std(scores)))\n",
    "    \n",
    "    print('-'*80)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  accuracy\n",
      "Best Score:  0.832407407407\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 1000.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.838\n",
      "Calculating Score on Prediction:  0.837725381415\n",
      "[[217  73]\n",
      " [ 44 387]]\n",
      "All CV Scores\n",
      "0 ):  0.560553633218\n",
      "1 ):  0.602076124567\n",
      "2 ):  0.515570934256\n",
      "3 ):  0.565972222222\n",
      "4 ):  0.645833333333\n",
      "5 ):  0.770833333333\n",
      "6 ):  0.826388888889\n",
      "7 ):  0.756944444444\n",
      "8 ):  0.662020905923\n",
      "9 ):  0.595818815331\n",
      "CV accuracy : 0.650 +/- 0.098\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# Accuracy (replication)\n",
    "from sklearn.metrics import accuracy_score\n",
    "classify_samples(pipe_svc, param_grid, accuracy_score, scoring='accuracy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  f1\n",
      "Best Score:  0.86733059081\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.835\n",
      "Calculating Score on Prediction:  0.869945355191\n",
      "[[204  86]\n",
      " [ 33 398]]\n",
      "All CV Scores\n",
      "0 ):  0.756043956044\n",
      "1 ):  0.808510638298\n",
      "2 ):  0.819047619048\n",
      "3 ):  0.771362586605\n",
      "4 ):  0.744444444444\n",
      "5 ):  0.810426540284\n",
      "6 ):  0.840294840295\n",
      "7 ):  0.808510638298\n",
      "8 ):  0.779043280182\n",
      "9 ):  0.74672489083\n",
      "CV f1 : 0.788 +/- 0.032\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# F1\n",
    "from sklearn.metrics import f1_score\n",
    "classify_samples(pipe_svc, param_grid, f1_score, scoring='f1', average='binary')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  f1\n",
      "Best Score:  0.86733059081\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.835\n",
      "Calculating Score on Prediction:  0.869945355191\n",
      "[[204  86]\n",
      " [ 33 398]]\n",
      "All CV Scores\n",
      "0 ):  0.756043956044\n",
      "1 ):  0.808510638298\n",
      "2 ):  0.819047619048\n",
      "3 ):  0.771362586605\n",
      "4 ):  0.744444444444\n",
      "5 ):  0.810426540284\n",
      "6 ):  0.840294840295\n",
      "7 ):  0.808510638298\n",
      "8 ):  0.779043280182\n",
      "9 ):  0.74672489083\n",
      "CV f1 : 0.788 +/- 0.032\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# F1 Macro-averaged\n",
    "classify_samples(pipe_svc, param_grid, f1_score, scoring='f1', average='macro')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  f1\n",
      "Best Score:  0.86733059081\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 1.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.835\n",
      "Calculating Score on Prediction:  0.869945355191\n",
      "[[204  86]\n",
      " [ 33 398]]\n",
      "All CV Scores\n",
      "0 ):  0.756043956044\n",
      "1 ):  0.808510638298\n",
      "2 ):  0.819047619048\n",
      "3 ):  0.771362586605\n",
      "4 ):  0.744444444444\n",
      "5 ):  0.810426540284\n",
      "6 ):  0.840294840295\n",
      "7 ):  0.808510638298\n",
      "8 ):  0.779043280182\n",
      "9 ):  0.74672489083\n",
      "CV f1 : 0.788 +/- 0.032\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# F1 Weighted Average\n",
    "classify_samples(pipe_svc, param_grid, f1_score, scoring='f1', average='weighted')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  roc_auc\n",
      "Best Score:  0.899145085903\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.834\n",
      "Calculating Score on Prediction:  0.818485478838\n",
      "[[215  75]\n",
      " [ 45 386]]\n",
      "All CV Scores\n",
      "0 ):  0.642243092824\n",
      "1 ):  0.750248459551\n",
      "2 ):  0.618366129994\n",
      "3 ):  0.596841105613\n",
      "4 ):  0.805668016194\n",
      "5 ):  0.963112910481\n",
      "6 ):  0.930974159044\n",
      "7 ):  0.835457589844\n",
      "8 ):  0.96284533172\n",
      "9 ):  0.509956644485\n",
      "CV roc_auc : 0.762 +/- 0.156\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# ROC AUC Score\n",
    "from sklearn.metrics import roc_auc_score\n",
    "classify_samples(pipe_svc, param_grid, roc_auc_score, scoring='roc_auc', average='macro')  # default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------------------------------------\n",
      "PERFORMING MACHINE LEARNING ANALYSIS WITH SCORING METRIC:  roc_auc\n",
      "Best Score:  0.899145085903\n",
      "Best Parameters:  {'clf__gamma': 1.0, 'clf__C': 10.0, 'clf__kernel': 'rbf'}\n",
      "Mean Accuracy on Test set: 0.834\n",
      "Calculating Score on Prediction:  0.818485478838\n",
      "[[215  75]\n",
      " [ 45 386]]\n",
      "All CV Scores\n",
      "0 ):  0.642243092824\n",
      "1 ):  0.750248459551\n",
      "2 ):  0.618366129994\n",
      "3 ):  0.596841105613\n",
      "4 ):  0.805668016194\n",
      "5 ):  0.963112910481\n",
      "6 ):  0.930974159044\n",
      "7 ):  0.835457589844\n",
      "8 ):  0.96284533172\n",
      "9 ):  0.509956644485\n",
      "CV roc_auc : 0.762 +/- 0.156\n",
      "--------------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "# ROC AUC Score - Weighted Average\n",
    "classify_samples(pipe_svc, param_grid, roc_auc_score, scoring='roc_auc', average='weighted')  # Roc AUC Weighted"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
